########################################################################
# DoPowerSimulations.R
# This file generates the simulations used for the power analysis figures
# In the SI. 
# Because we are using real data for this simulation (and it's not particularly well optimized code), 
# it is VERY slow. This simulation will many hours to run on most computers.
#
# Note that this creates simulation files (power.sim.csv and power.sim.interactions).
# These files have to be manually copied to the ./Data folder in order for the
# markdown file to use them (we don't copy in code: given how intensive this part of the replication is, 
# we don't want to risk accidentally overwriting with an incomplete simulation.).
#
# Please address any questions about this process to Ryan Jablonski, r.s.jablonski@lse.ac.uk
# 
########################################################################

options(warn=0)
set.seed(34685) #from random.org
rm(list=ls())

library(lfe)
library(survival)
library(dplyr)
library(ggplot2)


#the raw list of schools used to populate the maps
schools.lc=read.csv(".\\input\\school_treatment_information_lc_all_schools.csv", stringsAsFactors = FALSE)
schools.mp=read.csv(".\\input\\school_treatment_information_mp_all_schools.csv", stringsAsFactors = FALSE)
schools.lc=schools.lc[!is.na(schools.lc$winner_percent_local),]



sims=300
n.politicians=460
alpha=0.05
t.effects=seq(0.1,0.3, 0.01)


significant.experiments <- rep(NA, sims)         # Empty object to count significant experiments
coef.est <- rep(NA, sims)         # Empty object to count significant experiments
p.val <- rep(NA, sims)         # Empty object to count significant experiments

results.df=data.frame(z.coef=NA, z.pval=NA, zt.coef=NA, zt.pval=NA,
                      z.coef.clogit=NA, z.pval.clogit=NA, zt.coef.clogit=NA, zt.pval.clogit=NA, t.effect=NA,
                      z.coef.clogit.interaction=NA, z.pval.clogit.interaction=NA, zt.coef.clogit.interaction=NA, zt.pval.clogit.interaction=NA,
                      zw.coef.clogit.interaction=NA, zw.pval.clogit.interaction=NA, ztw.coef.clogit.interaction=NA, ztw.pval.clogit.interaction=NA,
                      w.effect_z=NA, w.effect_zt=NA
                      
)

for (h in 1:length(t.effects)){
  
  t.effect=0.09
  w.effect_z=0.01
  w.effect_zt=t.effects[h]
  z.effect=0.06
  
  for (i in 1:sims){
    sample.df=data.frame(z=NA, y=NA, t=NA, w=NA, y.interact=NA, map_id=NA, politician_id=NA, school_id=NA)
    
    map.id=0
    school.id=0
    
    for(j in 1:n.politicians){
      
      #assign politician to transparency treatment or control
      W.sim=rep(rbinom(n=1, size=1, prob=0.5), 3)
      
      this.wardid=NA
      repeat{  
        this.wardid= sample(schools.lc$ps_ward_id, 1)
        if(!is.na(this.wardid) & nrow(schools.lc[schools.lc$ps_ward_id==this.wardid,])>3){break}
      }
      
      #loop over maps
      for(k in 1:3){ 
        map.id=map.id+1
        school.ids=(nrow(sample.df)+1):(nrow(sample.df)+3)
        
        #assign map to treatment or control
        T.sim=rep(rbinom(n=1, size=1, prob=0.5), 3)
        
        #sample 3 schools from the ward distribution
        Z.sim= sample(schools.lc[schools.lc$ps_ward_id==this.wardid,]$school_need_index_ward, 3)
        
        #calculate treatment effect
        b1= z.effect
        b2= T.sim[1]*t.effect
        Beta=b1+b2
        
        #Calculation of denominator for probability calculation
        Denominator= sum(exp(Beta*Z.sim))
        
        #Calculating the matrix of probabilities for three choices
        vProb = cbind(exp(Z.sim[1]*Beta)/Denominator, exp(Z.sim[2]*Beta)/Denominator, exp(Z.sim[3]*Beta)/Denominator  )
        
        # Assigning the value one to maximum probability and zero for rest to get the appropriate choices for value of x
        mChoices = t(apply(vProb, 1, rmultinom, n = 1, size = 1))
        
        
        ######repeat for interaction model:##########
     
        #using terminology from the paper
        b1= z.effect
        b2=0
        b3=T.sim[1]*t.effect
        b4= 0
        b5=W.sim[1]*w.effect_z
        b6=T.sim[1]*W.sim[1]*w.effect_zt
        Beta.interact=b1+b2+b3+b4+b5+b6
        
        
        #Calculation of denominator for probability calculation
        Denominator.interact= sum(exp(Beta.interact*Z.sim))
        
        #Calculating the matrix of probabilities for three choices
        vProb.interact = cbind(exp(Z.sim[1]*Beta.interact)/Denominator, exp(Z.sim[2]*Beta.interact)/Denominator, exp(Z.sim[3]*Beta.interact)/Denominator  )
        
        # Assigning the value one to maximum probability and zero for rest to get the appropriate choices for value of x
        mChoices.interact = t(apply(vProb.interact, 1, rmultinom, n = 1, size = 1))
        
        sample.df[school.ids,]$z=Z.sim
        sample.df[school.ids,]$y=c(mChoices[1], mChoices[2], mChoices[3])
        sample.df[school.ids,]$t=T.sim
        sample.df[school.ids,]$w=W.sim
        sample.df[school.ids,]$y.interact=c(mChoices.interact[1], mChoices.interact[2], mChoices.interact[3])
        sample.df[school.ids,]$map_id=rep(map.id,3)
        sample.df[school.ids,]$politician_id=rep(j,3)
        sample.df[school.ids,]$school_id=school.ids
        
      }
    }
    
    fit.sim <- felm(y ~ z*t | map_id |0|politician_id, data=sample.df)                   
    fit.sim.clogit = clogit(as.formula(paste("y ~ z*t + strata(map_id) + cluster(politician_id)")), method="efron", sample.df)
    fit.sim.clogit.interaction = clogit(as.formula(paste("y.interact ~ z*t*w + strata(map_id) + cluster(politician_id)")), method="efron", sample.df)
    
    
    
    this.row=nrow(results.df)+1
    results.df[this.row,]$zt.pval <- summary(fit.sim)$coefficients[3,4]  
    results.df[this.row,]$zt.coef <- summary(fit.sim)$coefficients[3,1]  
    results.df[this.row,]$z.pval <- summary(fit.sim)$coefficients[1,4]  
    results.df[this.row,]$z.coef <- summary(fit.sim)$coefficients[1,1]  
    
    results.df[this.row,]$zt.pval.clogit <- summary(fit.sim.clogit)$coefficients[3,6]  
    results.df[this.row,]$zt.coef.clogit <- summary(fit.sim.clogit)$coefficients[3,1]  
    results.df[this.row,]$z.pval.clogit <- summary(fit.sim.clogit)$coefficients[1,6]  
    results.df[this.row,]$z.coef.clogit <- summary(fit.sim.clogit)$coefficients[1,1]  
    
    results.df[this.row,]$zt.pval.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[4,6]  
    results.df[this.row,]$zt.coef.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[4,1]  
    results.df[this.row,]$z.pval.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[1,6]  
    results.df[this.row,]$z.coef.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[1,1]  
    results.df[this.row,]$zw.pval.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[5,6]  
    results.df[this.row,]$zw.coef.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[5,1]  
    results.df[this.row,]$ztw.pval.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[7,6]  
    results.df[this.row,]$ztw.coef.clogit.interaction <- summary(fit.sim.clogit.interaction)$coefficients[7,1]
    
    results.df[this.row,]$t.effect <- t.effect
    results.df[this.row,]$w.effect_z <- w.effect_z
    results.df[this.row,]$w.effect_zt <- w.effect_zt
    
  }
  
  perc=round(h/length(t.effects)*100)
  cat(paste0(perc, "%\n"))
  
}


results.df=results.df[!is.na(results.df$zt.pval.clogit),]
write.csv(results.df, ".\\output\\power.sim.csv")
write.csv(results.df, ".\\output\\power.sim.interactions.csv")

